@inproceedings{ho2016model,
  title={Model-free imitation learning with policy optimization},
  author={Ho, Jonathan and Gupta, Jayesh and Ermon, Stefano},
  booktitle={International Conference on Machine Learning},
  pages={2760--2769},
  year={2016},
  url={https://arxiv.org/abs/1605.08478},
}

@article{williams1992simple,
  title={Simple statistical gradient-following algorithms for connectionist reinforcement learning},
  author={Williams, Ronald J},
  journal={Machine learning},
  volume={8},
  number={3-4},
  pages={229--256},
  year={1992},
  publisher={Springer}
}

@article{rubinstein2004cross,
  title={The cross-entropy method: A unified approach to Monte Carlo simulation, randomized optimization and machine learning},
  author={Rubinstein, Reuven Y and Kroese, Dirk P},
  journal={Information Science \& Statistics, Springer Verlag, NY},
  year={2004}
}

@article{duan2016rl,
  title={RL $\^{} 2$: Fast reinforcement learning via slow reinforcement learning},
  author={Duan, Yan and Schulman, John and Chen, Xi and Bartlett, Peter L and Sutskever, Ilya and Abbeel, Pieter},
  journal={arXiv preprint arXiv:1611.02779},
  year={2016}
}

@article{haarnoja2018soft,
  title={Soft actor-critic algorithms and applications},
  author={Haarnoja, Tuomas and Zhou, Aurick and Hartikainen, Kristian and Tucker, George and Ha, Sehoon and Tan, Jie and Kumar, Vikash and Zhu, Henry and Gupta, Abhishek and Abbeel, Pieter and others},
  journal={arXiv preprint arXiv:1812.05905},
  year={2018}
}

@article{rakelly2019efficient,
  title={Efficient Off-Policy Meta-Reinforcement Learning via Probabilistic Context Variables},
  author={Kate Rakelly and Aurick Zhou and Deirdre Quillen and Chelsea Finn and Sergey Levine},
  year={2019},
  journal={arXiv preprint arXiv:1903.08254},
}

@article{schulman2017proximal,
  title={Proximal policy optimization algorithms},
  author={Schulman, John and Wolski, Filip and Dhariwal, Prafulla and Radford, Alec and Klimov, Oleg},
  journal={arXiv preprint arXiv:1707.06347},
  year={2017}
}

@article{levine2018reinforcement,
  title={Reinforcement learning and control as probabilistic inference: Tutorial and review},
  author={Levine, Sergey},
  journal={arXiv preprint arXiv:1805.00909},
  year={2018}
}

@article{schulman2015trust,
    title={Trust region policy optimization},
    author={John Schulman and Sergey Levine and Philipp Moritz and Michael I. Jordan and Pieter Abbeel},
    year={2015},
    eprint={1502.05477},
    journal={arXiv},
}

@article{Fujimoto2018AddressingFA,
  title={Addressing Function Approximation Error in Actor-Critic Methods},
  author={Scott Fujimoto and Herke van Hoof and David Meger},
  journal={ArXiv},
  year={2018},
  url={https://arxiv.org/abs/1802.09477},
}

@article{yu2019metaworld,
    title={Meta-World: A Benchmark and Evaluation for Multi-Task and Meta Reinforcement Learning},
    author={Tianhe Yu and Deirdre Quillen and Zhanpeng He and Ryan Julian and Karol Hausman and Chelsea Finn and Sergey Levine},
    year={2019},
    journal={arXiv:1910.10897},
}

@article{lillicrap2015continuous,
  title={Continuous control with deep reinforcement learning},
  author={Lillicrap, Timothy P and Hunt, Jonathan J and Pritzel, Alexander and Heess, Nicolas and Erez, Tom and Tassa, Yuval and Silver, David and Wierstra, Daan},
  journal={arXiv preprint arXiv:1509.02971},
  year={2015}
}

@inproceedings{peters2007reward,
  author={J. {Peters} and S. {Schaal}},
  booktitle={2007 IEEE International Symposium on Approximate Dynamic Programming and Reinforcement Learning},
  title={Using Reward-weighted Regression for Reinforcement Learning of Task Space Control},
  year={2007},
  volume={},
  number={},
  pages={262-267}
}

@article{2009koberpolicy,
  title = {Policy Search for Motor Primitives in Robotics},
  author = {Kober, J. and Peters, J.},
  journal = {Advances in neural information processing systems 21 : 22nd Annual Conference on Neural Information Processing Systems 2008},
  booktitle = {Advances in neural information processing systems 21},
  pages = {849-856},
  editors = {Koller, D. , D. Schuurmans, Y. Bengio, L. Bottou},
  publisher = {Curran},
  organization = {Max-Planck-Gesellschaft},
  school = {Biologische Kybernetik},
  address = {Red Hook, NY, USA},
  month = jun,
  year = {2009},
  month_numeric = {6}
}

@misc{finn2017modelagnostic,
  title={Model-Agnostic Meta-Learning for Fast Adaptation of Deep Networks},
  author={Chelsea Finn and Pieter Abbeel and Sergey Levine},
  year={2017},
  eprint={1703.03400},
  archivePrefix={arXiv},
  primaryClass={cs.LG}
}
